// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2000-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.dev.test.translit;

import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;

public final class TestUtility {

    public static String hex(char ch) {
        String foo = Integer.toString(ch, 16).toUpperCase();
        return "0000".substring(0, 4 - foo.length()) + foo;
    }

    public static String hex(int ch) {
        String foo = Integer.toString(ch, 16).toUpperCase();
        return "00000000".substring(0, 4 - foo.length()) + foo;
    }

    public static String hex(String s) {
        return hex(s, ",");
    }

    public static String hex(String s, String sep) {
        if (s.length() == 0) return "";
        String result = hex(s.charAt(0));
        for (int i = 1; i < s.length(); ++i) {
            result += sep;
            result += hex(s.charAt(i));
        }
        return result;
    }

    public static String replace(String source, String toBeReplaced, String replacement) {
        StringBuilder results = new StringBuilder();
        int len = toBeReplaced.length();
        for (int i = 0; i < source.length(); ++i) {
            if (source.regionMatches(false, i, toBeReplaced, 0, len)) {
                results.append(replacement);
                i += len - 1; // minus one, since we will increment
            } else {
                results.append(source.charAt(i));
            }
        }
        return results.toString();
    }

    public static String replaceAll(String source, UnicodeSet set, String replacement) {
        StringBuilder results = new StringBuilder();
        int cp;
        for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
            cp = UTF16.charAt(source, i);
            if (set.contains(cp)) {
                results.append(replacement);
            } else {
                results.appendCodePoint(cp);
            }
        }
        return results.toString();
    }

    // COMMENTED OUT ALL THE OLD SCRIPT STUFF
    /*
      public static byte getScript(char c) {
        return getScript(getBlock(c));
      }

      public static byte getScript(byte block) {
        return blockToScript[block];
      }

      public static byte getBlock(char c) {
        int index = c >> 7;
        byte block = charToBlock[index];
        while (block < 0) { // take care of exceptions, blocks split across 128 boundaries
            int[] tuple = split[-block-1];
            if (c < tuple[0]) block = (byte)tuple[1];
            else block = (byte)tuple[2];
        }
        return block;
      }

      // returns next letter of script, or 0xFFFF if done

      public static char getNextLetter(char c, byte script) {
          while (c < 0xFFFF) {
              ++c;
              if (getScript(c) == script && Character.isLetter(c)) {
                  return c;
              }
          }
          return c;
      }

      // Supplements to Character methods; these methods go through
      // UCharacter if possible.  If not, they fall back to Character.

      public static boolean isUnassigned(char c) {
          try {
              return UCharacter.getType(c) == UCharacterCategory.UNASSIGNED;
          } catch (NullPointerException e) {
              System.out.print("");
          }
          return Character.getType(c) == Character.UNASSIGNED;
      }

      public static boolean isLetter(char c) {
          try {
              return UCharacter.isLetter(c);
          } catch (NullPointerException e) {
              System.out.print("");
          }
          return Character.isLetter(c);
      }

    public static void main(String[] args) {
      System.out.println("Blocks: ");
      byte lastblock = -128;
      for (char cc = 0; cc < 0xFFFF; ++cc) {
        byte block = TestUtility.getBlock(cc);
        if (block != lastblock) {
          System.out.println(TestUtility.hex(cc) + "\t" + block);
          lastblock = block;
        }
      }
      System.out.println();
      System.out.println("Scripts: ");
      byte lastScript = -128;
      for (char cc = 0; cc < 0xFFFF; ++cc) {
        byte script = TestUtility.getScript(cc);
        if (script != lastScript) {
          System.out.println(TestUtility.hex(cc) + "\t" + script);
          lastScript = script;
        }
      }
    }



      public static final byte // SCRIPT CODE
          COMMON_SCRIPT = 0,
          LATIN_SCRIPT = 1,
          GREEK_SCRIPT = 2,
          CYRILLIC_SCRIPT = 3,
          ARMENIAN_SCRIPT = 4,
          HEBREW_SCRIPT = 5,
          ARABIC_SCRIPT = 6,
          SYRIAC_SCRIPT = 7,
          THAANA_SCRIPT = 8,
          DEVANAGARI_SCRIPT = 9,
          BENGALI_SCRIPT = 10,
          GURMUKHI_SCRIPT = 11,
          GUJARATI_SCRIPT = 12,
          ORIYA_SCRIPT = 13,
          TAMIL_SCRIPT = 14,
          TELUGU_SCRIPT = 15,
          KANNADA_SCRIPT = 16,
          MALAYALAM_SCRIPT = 17,
          SINHALA_SCRIPT = 18,
          THAI_SCRIPT = 19,
          LAO_SCRIPT = 20,
          TIBETAN_SCRIPT = 21,
          MYANMAR_SCRIPT = 22,
          GEORGIAN_SCRIPT = 23,
          JAMO_SCRIPT = 24,
          HANGUL_SCRIPT = 25,
          ETHIOPIC_SCRIPT = 26,
          CHEROKEE_SCRIPT = 27,
          ABORIGINAL_SCRIPT = 28,
          OGHAM_SCRIPT = 29,
          RUNIC_SCRIPT = 30,
          KHMER_SCRIPT = 31,
          MONGOLIAN_SCRIPT = 32,
          HIRAGANA_SCRIPT = 33,
          KATAKANA_SCRIPT = 34,
          BOPOMOFO_SCRIPT = 35,
          HAN_SCRIPT = 36,
          YI_SCRIPT = 37;

      public static final byte // block code
          RESERVED_BLOCK = 0,
          BASIC_LATIN = 1,
          LATIN_1_SUPPLEMENT = 2,
          LATIN_EXTENDED_A = 3,
          LATIN_EXTENDED_B = 4,
          IPA_EXTENSIONS = 5,
          SPACING_MODIFIER_LETTERS = 6,
          COMBINING_DIACRITICAL_MARKS = 7,
          GREEK = 8,
          CYRILLIC = 9,
          ARMENIAN = 10,
          HEBREW = 11,
          ARABIC = 12,
          SYRIAC = 13,
          THAANA = 14,
          DEVANAGARI = 15,
          BENGALI = 16,
          GURMUKHI = 17,
          GUJARATI = 18,
          ORIYA = 19,
          TAMIL = 20,
          TELUGU = 21,
          KANNADA = 22,
          MALAYALAM = 23,
          SINHALA = 24,
          THAI = 25,
          LAO = 26,
          TIBETAN = 27,
          MYANMAR = 28,
          GEORGIAN = 29,
          HANGUL_JAMO = 30,
          ETHIOPIC = 31,
          CHEROKEE = 32,
          UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33,
          OGHAM = 34,
          RUNIC = 35,
          KHMER = 36,
          MONGOLIAN = 37,
          LATIN_EXTENDED_ADDITIONAL = 38,
          GREEK_EXTENDED = 39,
          GENERAL_PUNCTUATION = 40,
          SUPERSCRIPTS_AND_SUBSCRIPTS = 41,
          CURRENCY_SYMBOLS = 42,
          COMBINING_MARKS_FOR_SYMBOLS = 43,
          LETTERLIKE_SYMBOLS = 44,
          NUMBER_FORMS = 45,
          ARROWS = 46,
          MATHEMATICAL_OPERATORS = 47,
          MISCELLANEOUS_TECHNICAL = 48,
          CONTROL_PICTURES = 49,
          OPTICAL_CHARACTER_RECOGNITION = 50,
          ENCLOSED_ALPHANUMERICS = 51,
          BOX_DRAWING = 52,
          BLOCK_ELEMENTS = 53,
          GEOMETRIC_SHAPES = 54,
          MISCELLANEOUS_SYMBOLS = 55,
          DINGBATS = 56,
          BRAILLE_PATTERNS = 57,
          CJK_RADICALS_SUPPLEMENT = 58,
          KANGXI_RADICALS = 59,
          IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
          CJK_SYMBOLS_AND_PUNCTUATION = 61,
          HIRAGANA = 62,
          KATAKANA = 63,
          BOPOMOFO = 64,
          HANGUL_COMPATIBILITY_JAMO = 65,
          KANBUN = 66,
          BOPOMOFO_EXTENDED = 67,
          ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
          CJK_COMPATIBILITY = 69,
          CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
          CJK_UNIFIED_IDEOGRAPHS = 71,
          YI_SYLLABLES = 72,
          YI_RADICALS = 73,
          HANGUL_SYLLABLES = 74,
          HIGH_SURROGATES = 75,
          HIGH_PRIVATE_USE_SURROGATES = 76,
          LOW_SURROGATES = 77,
          PRIVATE_USE = 78,
          CJK_COMPATIBILITY_IDEOGRAPHS = 79,
          ALPHABETIC_PRESENTATION_FORMS = 80,
          ARABIC_PRESENTATION_FORMS_A = 81,
          COMBINING_HALF_MARKS = 82,
          CJK_COMPATIBILITY_FORMS = 83,
          SMALL_FORM_VARIANTS = 84,
          ARABIC_PRESENTATION_FORMS_B = 85,
          SPECIALS = 86,
          HALFWIDTH_AND_FULLWIDTH_FORMS = 87;

      static final byte[] blockToScript = {
          COMMON_SCRIPT, // 0, <RESERVED_BLOCK>
          LATIN_SCRIPT, // 1, BASIC_LATIN
          LATIN_SCRIPT, // 2, LATIN_1_SUPPLEMENT
          LATIN_SCRIPT, // 3, LATIN_EXTENDED_A
          LATIN_SCRIPT, // 4, LATIN_EXTENDED_B
          LATIN_SCRIPT, // 5, IPA_EXTENSIONS
          COMMON_SCRIPT, // 6, SPACING_MODIFIER_LETTERS
          COMMON_SCRIPT, // 7, COMBINING_DIACRITICAL_MARKS
          GREEK_SCRIPT, // 8, GREEK
          CYRILLIC_SCRIPT, // 9, CYRILLIC
          ARMENIAN_SCRIPT, // 10, ARMENIAN
          HEBREW_SCRIPT, // 11, HEBREW
          ARABIC_SCRIPT, // 12, ARABIC
          SYRIAC_SCRIPT, // 13, SYRIAC
          THAANA_SCRIPT, // 14, THAANA
          DEVANAGARI_SCRIPT, // 15, DEVANAGARI
          BENGALI_SCRIPT, // 16, BENGALI
          GURMUKHI_SCRIPT, // 17, GURMUKHI
          GUJARATI_SCRIPT, // 18, GUJARATI
          ORIYA_SCRIPT, // 19, ORIYA
          TAMIL_SCRIPT, // 20, TAMIL
          TELUGU_SCRIPT, // 21, TELUGU
          KANNADA_SCRIPT, // 22, KANNADA
          MALAYALAM_SCRIPT, // 23, MALAYALAM
          SINHALA_SCRIPT, // 24, SINHALA
          THAI_SCRIPT, // 25, THAI
          LAO_SCRIPT, // 26, LAO
          TIBETAN_SCRIPT, // 27, TIBETAN
          MYANMAR_SCRIPT, // 28, MYANMAR
          GEORGIAN_SCRIPT, // 29, GEORGIAN
          JAMO_SCRIPT, // 30, HANGUL_JAMO
          ETHIOPIC_SCRIPT, // 31, ETHIOPIC
          CHEROKEE_SCRIPT, // 32, CHEROKEE
          ABORIGINAL_SCRIPT, // 33, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
          OGHAM_SCRIPT, // 34, OGHAM
          RUNIC_SCRIPT, // 35, RUNIC
          KHMER_SCRIPT, // 36, KHMER
          MONGOLIAN_SCRIPT, // 37, MONGOLIAN
          LATIN_SCRIPT, // 38, LATIN_EXTENDED_ADDITIONAL
          GREEK_SCRIPT, // 39, GREEK_EXTENDED
          COMMON_SCRIPT, // 40, GENERAL_PUNCTUATION
          COMMON_SCRIPT, // 41, SUPERSCRIPTS_AND_SUBSCRIPTS
          COMMON_SCRIPT, // 42, CURRENCY_SYMBOLS
          COMMON_SCRIPT, // 43, COMBINING_MARKS_FOR_SYMBOLS
          COMMON_SCRIPT, // 44, LETTERLIKE_SYMBOLS
          COMMON_SCRIPT, // 45, NUMBER_FORMS
          COMMON_SCRIPT, // 46, ARROWS
          COMMON_SCRIPT, // 47, MATHEMATICAL_OPERATORS
          COMMON_SCRIPT, // 48, MISCELLANEOUS_TECHNICAL
          COMMON_SCRIPT, // 49, CONTROL_PICTURES
          COMMON_SCRIPT, // 50, OPTICAL_CHARACTER_RECOGNITION
          COMMON_SCRIPT, // 51, ENCLOSED_ALPHANUMERICS
          COMMON_SCRIPT, // 52, BOX_DRAWING
          COMMON_SCRIPT, // 53, BLOCK_ELEMENTS
          COMMON_SCRIPT, // 54, GEOMETRIC_SHAPES
          COMMON_SCRIPT, // 55, MISCELLANEOUS_SYMBOLS
          COMMON_SCRIPT, // 56, DINGBATS
          COMMON_SCRIPT, // 57, BRAILLE_PATTERNS
          HAN_SCRIPT, // 58, CJK_RADICALS_SUPPLEMENT
          HAN_SCRIPT, // 59, KANGXI_RADICALS
          HAN_SCRIPT, // 60, IDEOGRAPHIC_DESCRIPTION_CHARACTERS
          COMMON_SCRIPT, // 61, CJK_SYMBOLS_AND_PUNCTUATION
          HIRAGANA_SCRIPT, // 62, HIRAGANA
          KATAKANA_SCRIPT, // 63, KATAKANA
          BOPOMOFO_SCRIPT, // 64, BOPOMOFO
          JAMO_SCRIPT, // 65, HANGUL_COMPATIBILITY_JAMO
          HAN_SCRIPT, // 66, KANBUN
          BOPOMOFO_SCRIPT, // 67, BOPOMOFO_EXTENDED
          COMMON_SCRIPT, // 68, ENCLOSED_CJK_LETTERS_AND_MONTHS
          COMMON_SCRIPT, // 69, CJK_COMPATIBILITY
          HAN_SCRIPT, // 70, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
          HAN_SCRIPT, // 71, CJK_UNIFIED_IDEOGRAPHS
          YI_SCRIPT, // 72, YI_SYLLABLES
          YI_SCRIPT, // 73, YI_RADICALS
          HANGUL_SCRIPT, // 74, HANGUL_SYLLABLES
          COMMON_SCRIPT, // 75, HIGH_SURROGATES
          COMMON_SCRIPT, // 76, HIGH_PRIVATE_USE_SURROGATES
          COMMON_SCRIPT, // 77, LOW_SURROGATES
          COMMON_SCRIPT, // 78, PRIVATE_USE
          HAN_SCRIPT, // 79, CJK_COMPATIBILITY_IDEOGRAPHS
          COMMON_SCRIPT, // 80, ALPHABETIC_PRESENTATION_FORMS
          ARABIC_SCRIPT, // 81, ARABIC_PRESENTATION_FORMS_A
          COMMON_SCRIPT, // 82, COMBINING_HALF_MARKS
          COMMON_SCRIPT, // 83, CJK_COMPATIBILITY_FORMS
          COMMON_SCRIPT, // 84, SMALL_FORM_VARIANTS
          ARABIC_SCRIPT, // 85, ARABIC_PRESENTATION_FORMS_B
          COMMON_SCRIPT, // 86, SPECIALS
          COMMON_SCRIPT, // 87, HALFWIDTH_AND_FULLWIDTH_FORMS
          COMMON_SCRIPT, // 88, SPECIALS
      };

      // could be further reduced to a byte array, but I didn't bother.
      static final int[][] split = {
          {0x0250, 4, 5}, // -1
          {0x02B0, 5, 6}, // -2
          {0x0370, 7, 8}, // -3
          {0x0530, 0, 10}, // -4
          {0x0590, 10, 11}, // -5
          {0x0750, 13, 0}, // -6
          {0x07C0, 14, 0}, // -7
          {0x10A0, 28, 29}, // -8
          {0x13A0, 0, 32}, // -9
          {0x16A0, 34, 35}, // -10
          {0x18B0, 37, 0}, // -11
          {0x2070, 40, 41}, // -12
          {0x20A0, 41, -31}, // -13
          {0x2150, 44, 45}, // -14
          {0x2190, 45, 46}, // -15
          {0x2440, 49, -32}, // -16
          {0x25A0, 53, 54}, // -17
          {0x27C0, 56, 0}, // -18
          {0x2FE0, 59, -33}, // -19
          {0x3040, 61, 62}, // -20
          {0x30A0, 62, 63}, // -21
          {0x3130, 64, 65}, // -22
          {0x3190, 65, -34}, // -23
          {0x4DB6, 70, 0}, // -24
          {0xA490, 72, -35}, // -25
          {0xD7A4, 74, 0}, // -26
          {0xFB50, 80, 81}, // -27
          {0xFE20, 0, -36}, // -28
          {0xFEFF, 85, 86}, // -29
          {0xFFF0, 87, -37}, // -30
          {0x20D0, 42, 43}, // -31
          {0x2460, 50, 51}, // -32
          {0x2FF0, 0, 60}, // -33
          {0x31A0, 66, -38}, // -34
          {0xA4D0, 73, 0}, //-35
          {0xFE30, 82, -39}, //-36
          {0xFFFE, 88, 0}, //-37
          {0x31C0, 67, 0}, // -38
          {0xFE50, 83, -40}, //-39
          {0xFE70, 84, 85} // -40
      };

      static final byte[] charToBlock = {
        1, 2, 3, 4, -1, -2, -3, 8, 9, 9, -4, -5, 12, 12, -6, -7,
        0, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 27,
        28, -8, 30, 30, 31, 31, 31, -9, 33, 33, 33, 33, 33, -10, 0, 36,
        37, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 38, 39, 39,
        -12, -13, -14, -15, 47, 47, 48, 48, -16, 51, 52, -17, 55, 55, 56, -18,
        57, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, -19,
        -20, -21, -22, -23, 68, 68, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
        70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
        70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
        70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -24, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
        72, 72, 72, 72, 72, 72, 72, 72, 72, -25, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 74, 74, 74, 74, 74, 74, 74, 74,
        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
        74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, -26,
        75, 75, 75, 75, 75, 75, 75, 76, 77, 77, 77, 77, 77, 77, 77, 77,
        78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
        78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
        78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
        78, 78, 79, 79, 79, 79, -27, 81, 81, 81, 81, 81, -28, -29, 87, -30
      };
      */
}
